gvc_agora_opentargets

Setup environment

library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)

library(gprofiler2)

theme_set(theme_bw())

set.seed(666)

Read and prep data

GVC

Genes within 1Mb window of (each side of?) GVC loci from Fanny:

gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") |>
  clean_names() |> 
  separate(gene_id, c("gene_id", "version")) |>
  select(-version, -agora_nominated_list, -opentarget_info)

gvc
gvc.genes <- gvc |> distinct(gene_id, .keep_all = TRUE) |> select(gene_id, gene_symbol) |> arrange(gene_symbol)

gvc.genes

Agora

Alzheimer’s disease gene prioritization scores from Agora (see also related journal article):

ago1 <- read_json("syn25741025.overall_scores.json", simplifyVector = TRUE) |> as_tibble()

ago1

Alzheimer’s disease genes (AMPAD Agora) from Fanny:

ago2 <- read_csv("AMPAD_agora_032124_gene-list.csv")
ago2
ago <- ago1 |> filter(hgnc_symbol %in% ago2$`Gene Symbol`)

OpenTargets

Alzheimer’s disease gene prioritization scores from OpenTargets:

ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")

ot

Add Ensembl Gene IDs (WTF!):

otcols <- colnames(ot)
otensg <- gconvert(
  query = ot$symbol,
  organism = "hsapiens",
  target= "ENSG",
  mthreshold = Inf,
  filter_na = TRUE) |> 
  mutate(input_number = as.character(input_number)) |>
  left_join(ot |> rownames_to_column(var = "input_number"), by = "input_number") |> 
  select(ensembl_gene_id = target, otcols)

otensg

Annotate GVC genes with Agora and OpenTargets scores

sum(gvc.genes$gene_id %in% ago$ensembl_gene_id)
[1] 116
sum(gvc.genes$gene_id %in% otensg$ensembl_gene_id)
[1] 405

Arrange by Agora’s genetics_score and OpenTargets’ otGeneticsPortal:

d1 <- gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  select(-c(symbol, hgnc_symbol)) |> 
  select(gene_id, gene_symbol, genetics_score, otGeneticsPortal, everything())

d1

Arrange by OpenTargets’ otGeneticsPortal and Agora’s genetics_score:

gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(otGeneticsPortal), desc(genetics_score)) |> 
  select(-c(symbol, hgnc_symbol)) |> 
  select(gene_id, gene_symbol, otGeneticsPortal, genetics_score, everything())

Arrange by Agora’s target_risk_score and OpenTargets’ globalScore:

gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(target_risk_score), desc(globalScore)) |> 
  select(-c(symbol, hgnc_symbol)) |> 
  select(gene_id, gene_symbol, target_risk_score, globalScore, everything())

Arrange by OpenTargets’ globalScore and Agora’s target_risk_score:

gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(globalScore), desc(target_risk_score)) |> 
  select(-c(symbol, hgnc_symbol)) |> 
  select(gene_id, gene_symbol, globalScore, target_risk_score, everything())

Overlap between GVC genes and Agora and OpenTargets genes

x = list(
  "GVC" = gvc.genes$gene_id,
  "Agora" = ago$ensembl_gene_id,
  "OpenTargets" = otensg$ensembl_gene_id
)
library(VennDiagram)
grid.newpage()
v <- venn.diagram(
  x,
  fill = c("#FF0000", "#00FF00", "#0000FF"),
  filename = NULL)
grid.draw(v)

p <- get.venn.partitions(x)
p

Perform ORA of overlap genes

GVC ∩ Agora ∩ OpenTargets

query <- p |> 
  filter(..set.. == "GVC∩Agora∩OpenTargets") |>
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)
# save overlap gene ids for later
overlap_gene_ids <- query

GVC ∩ Agora

query <- p |> 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)")) |>
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC ∩ OpenTargets

query <- p |> 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩OpenTargets)∖(Agora)")) |>
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora ∩ OpenTargets

query <- p |> 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(Agora∩OpenTargets)∖(GVC)")) |>
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∪ (GVC ∩ OpenTargets) ∪ (Agora ∩ OpenTargets)

query <- p |> 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)", "(GVC∩OpenTargets)∖(Agora)", "(Agora∩OpenTargets)∖(GVC)")) |>
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora ∩ OpenTargets) ∖ (GVC)

query <- p |>
  filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ OpenTargets) ∖ (Agora)

query <- p |>
  filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(OpenTargets) ∖ (GVC ∪ Agora)

query <- p |>
  filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∖ (OpenTargets)

query <- p |>
  filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora) ∖ (GVC ∪ OpenTargets)

query <- p |>
  filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC) ∖ (Agora ∪ OpenTargets)

query <- p |>
  filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") |> 
  unnest(..values..) |>
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal)) |> 
  distinct(gene_id) |>
  pull(gene_id)
gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Perform ORA of GVC genes sorted by Agora or OpenTargets genetics scores

Agora

query <- d1 |> arrange(desc(genetics_score)) |> distinct(gene_id) |> pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

OpenTargets

query <- d1 |> arrange(desc(otGeneticsPortal)) |> distinct(gene_id) |> pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Correlation of Agora and OpenTargets scores in GVC genes

d1 |> nrow()
[1] 1345
d1 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 56
d1 |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)
d1 |> nrow()
[1] 1345
d1 |> drop_na(target_risk_score, globalScore) |> nrow()
[1] 75
d1 |> drop_na(target_risk_score, globalScore) |>
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
  unnest(cor)

Correlation of Agora and OpenTargets scores overall

d2 <- ago |> 
  left_join(otensg, by = "ensembl_gene_id") |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal))

d2
d2 |> nrow()
[1] 926
d2 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 75
d2 |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)
d2 |> nrow()
[1] 926
d2 |> drop_na(target_risk_score, globalScore) |> nrow()
[1] 484
d2 |> drop_na(target_risk_score, globalScore) |>
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
  unnest(cor)